#1 导入包
import re

#2 html文本获取
html_text='''

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>

<ul class="BookList">
  <li class="book1" id="book_01" href="http://www.biancheng.net/">
        <p class="name">c语言小白变怪兽</p>
        <p class="model"><a href="#">纸质书</a></p>
        <p class="price"><a href="http://localhost">80</a></p>
        <p class="color">红蓝色封装</p>
    </li>

    <li class="book2" id="book_02" href="http://www.biancheng.net/">
        <p class="name">Python入门到精通</p>
        <p class="model"><a id="dianzishu">电子书</a></p>
        <p class="price"><a>45</a></p>
        <p class="color">蓝绿色封装</p>
    </li>

    <li class="book2" id="book_03" href="http://www.biancheng.net/">
        <p class="name">悟空传</p>
        <p class="model"><a >电子书</a></p>
        <p class="price"><a>99</a></p>
        <p class="color">黑色封装</p>
    </li>
</ul>

</body>
</html>
'''

#3 写正则表达式把每本书的信息爬下来
#方法1：
# names = re.findall(r'<li.*?<p.*?>(.*?)</p>', html_text,re.S)
# print(names)
# models = re.findall(r'<li.*?</p>.*?<p.*?><a.*?>(.*?)</a>', html_text,re.S)
# print(models)
# prices = re.findall(r'<li.*?</p>.*?</p>.*?<p.*?<a.*?>(.*?)</a>', html_text,re.S)
# print(prices)
# colors = re.findall(r'<li.*?</p>.*?</p>.*?</p>.*?<p.*?>(.*?)</p>', html_text,re.S)
# print(colors)
# # 打印书籍信息
# with open("./books.txt", 'w', encoding='utf-8') as file:
#     for i in range(len(names)):
#         file.write(f"书名: {names[i]}, 格式: {models[i]}, 价格: {prices[i]}, 颜色: {colors[i]}"+"\n")




#3 方法2：
results = re.findall(r'<li.*?<p.*?>(.*?)</p>'
                          r'.*?<p.*?<a.*?>(.*?)</a>'
                          r'.*?<p.*?<a.*?>(.*?)</a>'
                          r'.*?<p.*?>(.*?)</p>', html_text,re.S)
print(results)
with open("./books2.txt", 'w', encoding='utf-8') as file:
    for i in results:
        # print(str(i))  #<class 'tuple'>
        file.write(str(i)+"\n")#这里的每个i是tuple，并不是字符串，所以不能直接保存写入文件
        #问题是如何把tuple转换为str？直接问AI



#4 保存到txt文件中
